More SMP guest support.
set_in_cr4(X86_CR4_TSD);
}
- /*
- * Initialize the per-CPU GDT with the boot GDT,
- * and set up the GDT descriptor:
- */
- if (cpu) {
- cpu_gdt_descr[cpu].size = GDT_SIZE;
- cpu_gdt_descr[cpu].address = 0; /* XXXcl alloc page */
- BUG(); /* XXXcl SMP */
- memcpy((void *)cpu_gdt_descr[cpu].address,
- (void *)cpu_gdt_descr[0].address, GDT_SIZE);
- }
/*
* Set up the per-thread TLS descriptor cache:
*/
ENTRY(startup_32)
cld
- /* Set up the stack pointer */
- lss stack_start,%esp
-
/* Copy the necessary stuff from xen_start_info structure. */
mov $xen_start_info_union,%edi
mov $128,%ecx
rep movsl
+#ifdef CONFIG_SMP
+ENTRY(startup_32_smp)
+ cld
+#endif /* CONFIG_SMP */
+
+ /* Set up the stack pointer */
+ lss stack_start,%esp
+
checkCPUtype:
/* get vendor info */
movb %cl,X86_MASK
movl %edx,X86_CAPABILITY
+ incb ready
+
xorl %eax,%eax # Clear FS/GS and LDT
movl %eax,%fs
movl %eax,%gs
cld # gcc2 wants the direction flag cleared at all times
+#ifdef CONFIG_SMP
+ movb ready, %cl
+ cmpb $1,%cl
+ je 1f # the first CPU calls start_kernel
+ # all other CPUs call initialize_secondary
+ call initialize_secondary
+ jmp L6
+1:
+#endif /* CONFIG_SMP */
call start_kernel
L6:
jmp L6 # main should never return here, but
.long init_thread_union+THREAD_SIZE
.long __BOOT_DS
+ready: .byte 0
+
# XXXcl
.globl idt_descr
.globl cpu_gdt_descr
#include <mach_apic.h>
#endif
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
+
/*
* Some notes on x86 processor bugs affecting SMP operation:
*
void __send_IPI_shortcut(unsigned int shortcut, int vector)
{
#if 1
- printk("__send_IPI_shortcut\n");
+ xxprint("__send_IPI_shortcut\n");
#else
/*
* Subtle. In the case of the 'never do double writes' workaround
void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
{
#if 1
- printk("send_IPI_mask_bitmask\n");
+ xxprint("send_IPI_mask_bitmask\n");
+ dump_stack();
#else
unsigned long mask = cpus_addr(cpumask)[0];
unsigned long cfg;
inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
{
#if 1
- printk("send_IPI_mask_sequence\n");
+ xxprint("send_IPI_mask_sequence\n");
#else
unsigned long cfg, flags;
unsigned int query_cpu;
leave_mm(cpu);
}
#if 1
- printk("smp_invalidate_interrupt ack_APIC_irq\n");
+ xxprint("smp_invalidate_interrupt ack_APIC_irq\n");
#else
ack_APIC_irq();
#endif
*/
send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+ xxprint("flush_tlb_others lockup");
while (!cpus_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
mb();
cpu_clear(smp_processor_id(), cpu_online_map);
local_irq_disable();
#if 1
- printk("stop_this_cpu disable_local_APIC\n");
+ xxprint("stop_this_cpu disable_local_APIC\n");
#else
disable_local_APIC();
#endif
local_irq_disable();
#if 1
- printk("smp_send_stop disable_local_APIC\n");
+ xxprint("smp_send_stop disable_local_APIC\n");
#else
disable_local_APIC();
#endif
asmlinkage void smp_reschedule_interrupt(void)
{
#if 1
- printk("smp_reschedule_interrupt: ack_APIC_irq\n");
+ xxprint("smp_reschedule_interrupt: ack_APIC_irq\n");
#else
ack_APIC_irq();
#endif
int wait = call_data->wait;
#if 1
- printk("smp_call_function_interrupt: ack_APIC_irq\n");
+ xxprint("smp_call_function_interrupt: ack_APIC_irq\n");
#else
ack_APIC_irq();
#endif
#include <asm/desc.h>
#include <asm/arch_hooks.h>
-#if 0
+#if 1
+#define Dprintk(args...)
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
+#else
#include <mach_apic.h>
#endif
#include <mach_wakecpu.h>
#include <smpboot_hooks.h>
-#if 0
/* Set if we find a B stepping CPU */
static int __initdata smp_b_stepping;
-#endif
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
void __init smp_alloc_memory(void)
{
#if 1
- printk("smp_alloc_memory\n");
+ int cpu;
+
+ xxprint("smp_alloc_memory\n");
+ for (cpu = 1; cpu < NR_CPUS; cpu++) {
+ cpu_gdt_descr[cpu].address = (unsigned long)
+ alloc_bootmem_low_pages(PAGE_SIZE);
+ /* XXX free unused pages later */
+ }
#else
trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
/*
* a given CPU
*/
-#if 0
static void __init smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = cpu_data + id;
valid_k7:
;
}
-#endif
#if 0
/*
#undef NR_LOOPS
extern void calibrate_delay(void);
+#endif
static atomic_t init_deasserted;
-#endif
void __init smp_callin(void)
{
-#if 1
- printk("smp_callin\n");
-#else
int cpuid, phys_id;
unsigned long timeout;
+#if 0
/*
* If waken up by an INIT in an 82489DX configuration
* we may get here before an INIT-deassert IPI reaches
* lock up on an APIC access.
*/
wait_for_init_deassert(&init_deasserted);
+#endif
/*
* (This works even if the APIC is not enabled.)
*/
- phys_id = GET_APIC_ID(apic_read(APIC_ID));
+ phys_id = smp_processor_id();
cpuid = smp_processor_id();
if (cpu_isset(cpuid, cpu_callin_map)) {
printk("huh, phys CPU#%d, CPU#%d already present??\n",
BUG();
}
+#if 0
/*
* the boot CPU has finished the init stage and is spinning
* on callin_map until we finish. We are free to set up this
* Get our bogomips.
*/
calibrate_delay();
+#endif
Dprintk("Stack at about %p\n",&cpuid);
/*
*/
smp_store_cpu_info(cpuid);
+#if 0
disable_APIC_timer();
local_irq_disable();
+#endif
/*
* Allow the master to continue.
*/
cpu_set(cpuid, cpu_callin_map);
+#if 0
/*
* Synchronize the TSC with the BP
*/
*/
int __init start_secondary(void *unused)
{
+ /*
+ * Dont put anything before smp_callin(), SMP
+ * booting is too fragile that we want to limit the
+ * things done here to the most necessary things.
+ */
+ cpu_init();
+ smp_callin();
+ while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
+ rep_nop();
#if 1
- printk("start_secondary\n");
+ if (0) {
+ char *msg = "start_secondary\n";
+ char *msg2 = "delay2\n";
+ int timeout;
+ (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg);
+ for (timeout = 0; timeout < 50000; timeout++) {
+ udelay(100);
+ if (timeout == 20000) {
+ (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
+ timeout = 0;
+ }
+ }
+ }
+ // enable_APIC_timer();
+ /*
+ * low-memory mappings have been cleared, flush them from
+ * the local TLBs too.
+ */
+ // local_flush_tlb();
+ cpu_set(smp_processor_id(), cpu_online_map);
+ wmb();
+ if (10) {
+ char *msg2 = "delay2\n";
+ int timeout;
+ for (timeout = 0; timeout < 50000; timeout++) {
+ udelay(1000);
+ if (timeout == 2000) {
+ (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
+ timeout = 0;
+ }
+ }
+ }
return cpu_idle();
#else
/*
void map_cpu_to_logical_apicid(void)
{
#if 1
- printk("map_cpu_to_logical_apicid\n");
+ xxprint("map_cpu_to_logical_apicid\n");
#else
int cpu = smp_processor_id();
int apicid = logical_smp_processor_id();
wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
{
#if 1
- printk("wakeup_secondary_cpu\n");
+ xxprint("wakeup_secondary_cpu\n");
return 0;
#else
unsigned long send_status = 0, accept_status = 0;
extern cpumask_t cpu_initialized;
-#if 0
static int __init do_boot_cpu(int apicid)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
unsigned long boot_error;
int timeout, cpu;
unsigned long start_eip;
+#if 0
unsigned short nmi_high = 0, nmi_low = 0;
+#endif
+ full_execution_context_t ctxt;
+ extern void startup_32_smp(void);
+ extern void hypervisor_callback(void);
+ extern void failsafe_callback(void);
+ int i;
cpu = ++cpucount;
/*
panic("failed fork for CPU %d", cpu);
idle->thread.eip = (unsigned long) start_secondary;
/* start_eip had better be page-aligned! */
- start_eip = setup_trampoline();
+ start_eip = (unsigned long)startup_32_smp;
/* So we see what's up */
printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
atomic_set(&init_deasserted, 0);
+#if 1
+ if (cpu_gdt_descr[0].size > PAGE_SIZE)
+ BUG();
+ cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+ memcpy((void *)cpu_gdt_descr[cpu].address,
+ (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+ memset((char *)cpu_gdt_descr[cpu].address +
+ FIRST_RESERVED_GDT_ENTRY * 8, 0,
+ NR_RESERVED_GDT_ENTRIES * 8);
+
+ memset(&ctxt, 0, sizeof(ctxt));
+
+ ctxt.cpu_ctxt.ds = __USER_DS;
+ ctxt.cpu_ctxt.es = __USER_DS;
+ ctxt.cpu_ctxt.fs = 0;
+ ctxt.cpu_ctxt.gs = 0;
+ ctxt.cpu_ctxt.ss = __KERNEL_DS;
+ ctxt.cpu_ctxt.cs = __KERNEL_CS;
+ ctxt.cpu_ctxt.eip = start_eip;
+ ctxt.cpu_ctxt.esp = idle->thread.esp;
+ ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2);
+
+ /* FPU is set up to default initial state. */
+ memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+ /* Virtual IDT is empty at start-of-day. */
+ for ( i = 0; i < 256; i++ )
+ {
+ ctxt.trap_ctxt[i].vector = i;
+ ctxt.trap_ctxt[i].cs = FLAT_GUESTOS_CS;
+ }
+ ctxt.fast_trap_idx = 0;
+
+ /* No LDT. */
+ ctxt.ldt_ents = 0;
+
+ {
+ unsigned long va;
+ int f;
+
+ for (va = cpu_gdt_descr[cpu].address, f = 0;
+ va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+ va += PAGE_SIZE, f++) {
+ ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ protect_page(swapper_pg_dir, (void *)va, PROT_ON);
+ }
+ ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
+ flush_page_update_queue();
+ }
+
+ /* Ring 1 stack is the initial stack. */
+ ctxt.guestos_ss = __KERNEL_DS;
+ ctxt.guestos_esp = idle->thread.esp;
+
+ /* Callback handlers. */
+ ctxt.event_callback_cs = __KERNEL_CS;
+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
+ ctxt.failsafe_callback_cs = __KERNEL_CS;
+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+ ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
+
+ boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ cpu_set(cpu, cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (cpu_isset(cpu, cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ Dprintk("OK.\n");
+ printk("CPU%d: ", cpu);
+ print_cpu_info(&cpu_data[cpu]);
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+ }
+ }
+ x86_cpu_to_apicid[cpu] = apicid;
+ if (boot_error) {
+ /* Try to put things back the way they were before ... */
+ // unmap_cpu_to_logical_apicid(cpu);
+ cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
+ cpucount--;
+ }
+
+#else
Dprintk("Setting warm reset code and vector.\n");
store_NMI_vector(&nmi_high, &nmi_low);
/* mark "stuck" area as not stuck */
*((volatile unsigned long *)trampoline_base) = 0;
+#endif
return boot_error;
}
cycles_t cacheflush_time;
-#endif
unsigned long cache_decay_ticks;
-#if 0
static void smp_tune_scheduling (void)
{
* Cycle through the processors sending APIC IPIs to boot each.
*/
+#if 0
static int boot_cpu_logical_apicid;
#endif
/* Where the IO area was mapped on multiquad, always 0 otherwise */
static void __init smp_boot_cpus(unsigned int max_cpus)
{
-#if 1
- printk("smp_boot_cpus %d\n", max_cpus);
-#else
- int apicid, cpu, bit, kicked;
+ int cpu, kicked;
unsigned long bogosum = 0;
+#if 0
+ int apicid, bit;
+#endif
/*
* Setup boot CPU information
printk("CPU%d: ", 0);
print_cpu_info(&cpu_data[0]);
+#if 0
boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
boot_cpu_logical_apicid = logical_smp_processor_id();
x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+#else
+ // boot_cpu_physical_apicid = 0;
+ // boot_cpu_logical_apicid = 0;
+ x86_cpu_to_apicid[0] = 0;
+#endif
current_thread_info()->cpu = 0;
smp_tune_scheduling();
* If we couldn't find an SMP configuration at boot time,
* get out of here now!
*/
- if (!smp_found_config && !acpi_lapic) {
+ if (!smp_found_config /* && !acpi_lapic) */) {
printk(KERN_NOTICE "SMP motherboard not detected.\n");
smpboot_clear_io_apic_irqs();
+#if 0
phys_cpu_present_map = physid_mask_of_physid(0);
if (APIC_init_uniprocessor())
printk(KERN_NOTICE "Local APIC not detected."
" Using dummy APIC emulation.\n");
+#endif
map_cpu_to_logical_apicid();
return;
}
+#if 0
/*
* Should not be necessary because the MP table should list the boot
* CPU too, but we do it for the sake of robustness anyway.
}
verify_local_APIC();
+#endif
/*
* If SMP should be disabled, then really disable it!
*/
if (!max_cpus) {
- smp_found_config = 0;
+ HYPERVISOR_shared_info->n_vcpu = 1;
printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
smpboot_clear_io_apic_irqs();
+#if 0
phys_cpu_present_map = physid_mask_of_physid(0);
+#endif
return;
}
+#if 0
connect_bsp_APIC();
setup_local_APIC();
map_cpu_to_logical_apicid();
* clustered apic ID.
*/
Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
+#endif
+ Dprintk("CPU present map: %lx\n",
+ (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
kicked = 1;
- for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
- apicid = cpu_present_to_apicid(bit);
- /*
- * Don't even attempt to start the boot CPU!
- */
- if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
- continue;
-
- if (!check_apicid_present(bit))
- continue;
+ for (cpu = 1; kicked < NR_CPUS &&
+ cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
if (max_cpus <= cpucount+1)
continue;
- if (do_boot_cpu(apicid))
+ if (do_boot_cpu(cpu))
printk("CPU #%d not responding - cannot use it.\n",
- apicid);
+ cpu);
else
++kicked;
}
+#if 0
/*
* Cleanup possible dangling ends...
*/
smpboot_restore_warm_reset_vector();
+#endif
/*
* Allow the user to impress friends.
printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
}
+#if 0
if (nmi_watchdog == NMI_LOCAL_APIC)
check_nmi_watchdog();
if (cpu_has_tsc && cpucount && cpu_khz)
synchronize_tsc_bp();
#endif
+ xxprint("smp_boot_cpus done\n");
}
/* These are wrappers to interface to the new boot process. Someone
int __devinit __cpu_up(unsigned int cpu)
{
+ xxprint("__cpu_up\n");
/* This only works at boot for x86. See "rewrite" above. */
if (cpu_isset(cpu, smp_commenced_mask)) {
local_irq_enable();
cpu_set(cpu, smp_commenced_mask);
while (!cpu_isset(cpu, cpu_online_map))
mb();
+ xxprint("__cpu_up ok\n");
return 0;
}
void __init smp_cpus_done(unsigned int max_cpus)
{
#if 1
- printk("smp_cpus_done %d\n", max_cpus);
#else
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
void __init smp_intr_init(void)
{
#if 1
- printk("smp_intr_init\n");
+ xxprint("smp_intr_init\n");
#else
/*
* IRQ0 must be given a fixed assignment and initialized,
#include <asm-xen/xen-public/physdev.h>
#include <asm-xen/ctrl_if.h>
#include <asm-xen/hypervisor.h>
+#define XEN_EVTCHN_MASK_OPS
+#include <asm-xen/evtchn.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
EXPORT_SYMBOL(force_evtchn_callback);
int irq;
unsigned long flags;
shared_info_t *s = HYPERVISOR_shared_info;
+ vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()];
local_irq_save(flags);
- while ( s->vcpu_data[0].evtchn_upcall_pending )
+ while ( vcpu_info->evtchn_upcall_pending )
{
- s->vcpu_data[0].evtchn_upcall_pending = 0;
+ vcpu_info->evtchn_upcall_pending = 0;
/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
- l1 = xchg(&s->evtchn_pending_sel, 0);
+ l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
while ( (l1i = ffs(l1)) != 0 )
{
l1i--;
NULL
};
+static irqreturn_t xen_dbg(int irq, void *dev_id, struct pt_regs *regs)
+{
+ char *msg = "debug\n";
+ (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg);
+ return IRQ_HANDLED;
+}
+
+static struct irqaction xen_action = {
+ xen_dbg,
+ SA_INTERRUPT,
+ CPU_MASK_CPU0,
+ "xen-dbg",
+ NULL,
+ NULL
+};
+
void irq_suspend(void)
{
int pirq, virq, irq, evtchn;
(void)setup_irq(bind_virq_to_irq(VIRQ_MISDIRECT), &misdirect_action);
+ printk("debug_int\n");
+ (void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &xen_action);
+
/* This needs to be done early, but after the IRQ subsystem is alive. */
ctrl_if_init();
}
*/
#include "common.h"
+#include <asm-xen/evtchn.h>
/*
* These are rather arbitrary. They are fairly large because adjacent requests
#include <linux/interrupt.h>
#include <scsi/scsi.h>
#include <asm-xen/ctrl_if.h>
+#include <asm-xen/evtchn.h>
typedef unsigned char byte; /* from linux/ide.h */
#include <linux/poll.h>
#include <linux/irq.h>
#include <linux/init.h>
+#define XEN_EVTCHN_MASK_OPS
#include <asm-xen/evtchn.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
*/
#include "common.h"
+#include <asm-xen/evtchn.h>
static void netif_page_release(struct page *page);
static void netif_skb_release(struct sk_buff *skb);
setup_IO_APIC();
#endif
}
+
+
+#define smp_found_config (HYPERVISOR_shared_info->n_vcpu > 1)
#include <asm/segment.h>
#include <asm/cpufeature.h>
#include <asm-xen/hypervisor.h>
-#include <asm-xen/evtchn.h>
#ifdef __KERNEL__
#include <asm/ptrace.h>
#include <asm/synch_bitops.h>
#include <asm-xen/xen-public/event_channel.h>
+#include <linux/smp.h>
/*
* LOW-LEVEL DEFINITIONS
*/
-/* Force a proper event-channel callback from Xen. */
-void force_evtchn_callback(void);
-
/* Entry point for notifications into Linux subsystems. */
void evtchn_do_upcall(struct pt_regs *regs);
/* Entry point for notifications into the userland character device. */
void evtchn_device_upcall(int port);
+#ifdef XEN_EVTCHN_MASK_OPS
+
static inline void mask_evtchn(int port)
{
shared_info_t *s = HYPERVISOR_shared_info;
static inline void unmask_evtchn(int port)
{
shared_info_t *s = HYPERVISOR_shared_info;
+ vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()];
synch_clear_bit(port, &s->evtchn_mask[0]);
* a real IO-APIC we 'lose the interrupt edge' if the channel is masked.
*/
if ( synch_test_bit (port, &s->evtchn_pending[0]) &&
- !synch_test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
+ !synch_test_and_set_bit(port>>5, &vcpu_info->evtchn_pending_sel) )
{
- s->vcpu_data[0].evtchn_upcall_pending = 1;
- if ( !s->vcpu_data[0].evtchn_upcall_mask )
+ vcpu_info->evtchn_upcall_pending = 1;
+ if ( !vcpu_info->evtchn_upcall_mask )
force_evtchn_callback();
}
}
+#endif /* XEN_EVTCHN_MASK_OPS */
+
static inline void clear_evtchn(int port)
{
shared_info_t *s = HYPERVISOR_shared_info;
extern union xen_start_info_union xen_start_info_union;
#define xen_start_info (xen_start_info_union.xen_start_info)
+/* arch/xen/kernel/evtchn.c */
+/* Force a proper event-channel callback from Xen. */
+void force_evtchn_callback(void);
+
/* arch/xen/kernel/process.c */
void xen_cpu_idle (void);
return ret;
}
+static inline int
+HYPERVISOR_boot_vcpu(
+ unsigned long vcpu, full_execution_context_t *ctxt)
+{
+ int ret;
+ unsigned long ign1, ign2;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret), "=b" (ign1), "=c" (ign2)
+ : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
+ : "memory");
+
+ return ret;
+}
+
#endif /* __HYPERVISOR_H__ */
/* The new domain's shared-info frame number. */
unsigned long shared_info_frame;
- unsigned char shared_info[PAGE_SIZE]; /* saved contents from file */
+ unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
+ shared_info_t *shared_info = (shared_info_t *)shared_info_page;
/* A copy of the CPU context of the guest. */
full_execution_context_t ctxt;
}
}
- if ( xcio_read(ioctxt, &ctxt, sizeof(ctxt)) ||
- xcio_read(ioctxt, shared_info, PAGE_SIZE) )
+ if ( xcio_read(ioctxt, &ctxt, sizeof(ctxt)) ||
+ xcio_read(ioctxt, shared_info_page, PAGE_SIZE) )
{
xcio_error(ioctxt, "Error when reading from state file");
goto out;
ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT;
/* clear any pending events and the selector */
- memset(&(((shared_info_t *)shared_info)->evtchn_pending[0]),
- 0, sizeof (((shared_info_t *)shared_info)->evtchn_pending)+
- sizeof(((shared_info_t *)shared_info)->evtchn_pending_sel));
+ memset(&(shared_info->evtchn_pending[0]), 0,
+ sizeof (shared_info->evtchn_pending));
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ shared_info->vcpu_data[i].evtchn_pending_sel = 0;
/* Copy saved contents of shared-info page. No checking needed. */
ppage = xc_map_foreign_range(
* Relinquish GDT mappings. No need for explicit unmapping of the LDT as
* it automatically gets squashed when the guest's mappings go away.
*/
- destroy_gdt(d);
+ for_each_exec_domain(d, ed)
+ destroy_gdt(ed);
/* Relinquish every page of memory. */
relinquish_list(d, &d->xenpage_list);
.long SYMBOL_NAME(do_grant_table_op) /* 20 */
.long SYMBOL_NAME(do_vm_assist)
.long SYMBOL_NAME(do_update_va_mapping_otherdomain)
+ .long SYMBOL_NAME(do_boot_vcpu)
.rept NR_hypercalls-((.-hypercall_table)/4)
.long SYMBOL_NAME(do_ni_hypercall)
.endr
}
-void destroy_gdt(struct domain *d)
+void destroy_gdt(struct exec_domain *ed)
{
- struct exec_domain *ed;
int i;
unsigned long pfn;
- for_each_exec_domain(d, ed) {
- for ( i = 0; i < 16; i++ )
- {
- if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_pt[i])) != 0 )
- put_page_and_type(&frame_table[pfn]);
- ed->mm.perdomain_pt[i] = mk_l1_pgentry(0);
- }
+ for ( i = 0; i < 16; i++ )
+ {
+ if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_pt[i])) != 0 )
+ put_page_and_type(&frame_table[pfn]);
+ ed->mm.perdomain_pt[i] = mk_l1_pgentry(0);
}
}
unmap_domain_mem(vgdt);
/* Tear down the old GDT. */
- destroy_gdt(d);
+ destroy_gdt(ed);
/* Install the new GDT. */
for ( i = 0; i < nr_pages; i++ )
#include <xen/config.h>
#include <xen/init.h>
#include <xen/lib.h>
+#include <xen/sched.h>
#include <xen/errno.h>
#include <xen/sched.h>
#include <xen/mm.h>
arch_do_createdomain(ed);
- sched_add_domain(d);
+ sched_add_domain(ed);
write_lock(&domlist_lock);
pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
}
else
{
- sched_add_domain(d);
+ sched_add_domain(ed);
}
return d;
return rc;
}
+extern xmem_cache_t *exec_domain_struct_cachep;
+
+/*
+ * final_setup_guestos is used for final setup and launching of domains other
+ * than domain 0. ie. the domains that are being built by the userspace dom0
+ * domain builder.
+ */
+long do_boot_vcpu(unsigned long vcpu, full_execution_context_t *ctxt)
+{
+ struct domain *d = current->domain;
+ struct exec_domain *ed;
+ int rc = 0;
+ full_execution_context_t *c;
+
+ if ( d->exec_domain[vcpu] != NULL )
+ return EINVAL;
+
+ if ( alloc_exec_domain_struct(d, vcpu) == NULL )
+ return -ENOMEM;
+
+ if ( (c = xmalloc(sizeof(*c))) == NULL )
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if ( copy_from_user(c, ctxt, sizeof(*c)) )
+ {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ printk("do_boot_vcpu for dom %d vcpu %d\n", d->id, vcpu);
+
+ ed = d->exec_domain[vcpu];
+
+ atomic_set(&ed->pausecnt, 0);
+ shadow_lock_init(ed);
+
+ memcpy(&ed->thread, &idle0_exec_domain.thread, sizeof(ed->thread));
+
+ /* arch_do_createdomain */
+ ed->mm.perdomain_pt = (l1_pgentry_t *)alloc_xenheap_page();
+ memset(ed->mm.perdomain_pt, 0, PAGE_SIZE);
+ machine_to_phys_mapping[virt_to_phys(ed->mm.perdomain_pt) >>
+ PAGE_SHIFT] = INVALID_P2M_ENTRY;
+
+ sched_add_domain(ed);
+
+ if ( (rc = arch_final_setup_guestos(ed, c)) != 0 )
+ goto out;
+
+ /* Set up the shared info structure. */
+ update_dom_time(d);
+
+ /* domain_unpause_by_systemcontroller */
+ if ( test_and_clear_bit(EDF_CTRLPAUSE, &ed->ed_flags) )
+ domain_wake(ed);
+
+ xfree(c);
+ return 0;
+
+ out:
+ if ( c != NULL )
+ xfree(c);
+ xmem_cache_free(exec_domain_struct_cachep, d->exec_domain[vcpu]);
+ d->exec_domain[vcpu] = NULL;
+ return rc;
+}
+
long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
{
if ( type > MAX_VMASST_TYPE )
page->u.inuse.type_info);
for_each_exec_domain ( d, ed ) {
- printk("Guest: CPU %d [has=%c] flags=%lx "
- "upcall_pend = %02x, upcall_mask = %02x\n",
+ printk("Guest: %p CPU %d [has=%c] flags=%lx "
+ "upcall_pend = %02x, upcall_mask = %02x\n", ed,
ed->processor,
test_bit(EDF_RUNNING, &ed->ed_flags) ? 'T':'F',
ed->ed_flags,
ed->vcpu_info->evtchn_upcall_pending,
ed->vcpu_info->evtchn_upcall_mask);
}
- printk("Notifying guest...\n");
+ ed = d->exec_domain[0];
+ printk("Notifying guest... %d/%d\n", d->id, ed->eid);
+ printk("port %d/%d stat %d %d %d\n",
+ VIRQ_DEBUG, d->virq_to_evtchn[VIRQ_DEBUG],
+ test_bit(d->virq_to_evtchn[VIRQ_DEBUG], &d->shared_info->evtchn_pending[0]),
+ test_bit(d->virq_to_evtchn[VIRQ_DEBUG], &d->shared_info->evtchn_mask[0]),
+ test_bit(d->virq_to_evtchn[VIRQ_DEBUG]>>5, &ed->vcpu_info->evtchn_pending_sel));
send_guest_virq(d->exec_domain[0], VIRQ_DEBUG);
}
}
ed->ed_sched_priv = &BVT_INFO(d)->ed_inf[ed->eid];
BVT_INFO(d)->ed_inf[ed->eid].inf = BVT_INFO(d);
+ BVT_INFO(d)->ed_inf[ed->eid].exec_domain = ed;
return 0;
}
ASSERT(inf != NULL);
ASSERT(d != NULL);
- inf->mcu_advance = MCU_ADVANCE;
- inf->domain = d->domain;
+ if (d->eid == 0) {
+ inf->mcu_advance = MCU_ADVANCE;
+ inf->domain = d->domain;
+ inf->warpback = 0;
+ /* Set some default values here. */
+ inf->warp = 0;
+ inf->warp_value = 0;
+ inf->warpl = MILLISECS(2000);
+ inf->warpu = MILLISECS(1000);
+ /* initialise the timers */
+ init_ac_timer(&inf->warp_timer);
+ inf->warp_timer.cpu = d->processor;
+ inf->warp_timer.data = (unsigned long)inf;
+ inf->warp_timer.function = &warp_timer_fn;
+ init_ac_timer(&inf->unwarp_timer);
+ inf->unwarp_timer.cpu = d->processor;
+ inf->unwarp_timer.data = (unsigned long)inf;
+ inf->unwarp_timer.function = &unwarp_timer_fn;
+ }
+
einf->exec_domain = d;
- inf->warpback = 0;
- /* Set some default values here. */
- inf->warp = 0;
- inf->warp_value = 0;
- inf->warpl = MILLISECS(2000);
- inf->warpu = MILLISECS(1000);
- /* initialise the timers */
- init_ac_timer(&inf->warp_timer);
- inf->warp_timer.cpu = d->processor;
- inf->warp_timer.data = (unsigned long)inf;
- inf->warp_timer.function = &warp_timer_fn;
- init_ac_timer(&inf->unwarp_timer);
- inf->unwarp_timer.cpu = d->processor;
- inf->unwarp_timer.data = (unsigned long)inf;
- inf->unwarp_timer.function = &unwarp_timer_fn;
-
+
if ( d->domain->id == IDLE_DOMAIN_ID )
{
einf->avt = einf->evt = ~0U;
xmem_cache_free(domain_struct_cachep, d);
}
-struct domain *alloc_domain_struct(void)
+struct exec_domain *alloc_exec_domain_struct(struct domain *d,
+ unsigned long vcpu)
{
- struct domain *d;
- struct exec_domain *ed = NULL;
+ struct exec_domain *ed, *edc;
- if ( (d = xmem_cache_alloc(domain_struct_cachep)) == NULL )
- return NULL;
-
- memset(d, 0, sizeof(*d));
+ ASSERT( d->exec_domain[vcpu] == NULL );
if ( (ed = xmem_cache_alloc(exec_domain_struct_cachep)) == NULL )
- goto out;
+ return NULL;
memset(ed, 0, sizeof(*ed));
- d->exec_domain[0] = ed;
+ d->exec_domain[vcpu] = ed;
ed->domain = d;
+ ed->eid = vcpu;
if ( SCHED_OP(alloc_task, ed) < 0 )
goto out;
+ if (vcpu != 0) {
+ ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
+
+ for_each_exec_domain(d, edc) {
+ if (edc->ed_next_list == NULL || edc->ed_next_list->eid > vcpu)
+ break;
+ }
+ ed->ed_next_list = edc->ed_next_list;
+ edc->ed_next_list = ed;
+
+ if (test_bit(EDF_CPUPINNED, &edc->ed_flags)) {
+ ed->processor = (edc->processor + 1) % smp_num_cpus;
+ set_bit(EDF_CPUPINNED, &ed->ed_flags);
+ } else {
+ ed->processor = (edc->processor + 1) % smp_num_cpus; /* XXX */
+ }
+ }
+
+ return ed;
+
+ out:
+ d->exec_domain[vcpu] = NULL;
+ xmem_cache_free(exec_domain_struct_cachep, ed);
+
+ return NULL;
+}
+
+struct domain *alloc_domain_struct(void)
+{
+ struct domain *d;
+
+ if ( (d = xmem_cache_alloc(domain_struct_cachep)) == NULL )
+ return NULL;
+
+ memset(d, 0, sizeof(*d));
+
+ if ( alloc_exec_domain_struct(d, 0) == NULL )
+ goto out;
+
return d;
out:
- if ( ed )
- xmem_cache_free(exec_domain_struct_cachep, ed);
xmem_cache_free(domain_struct_cachep, d);
return NULL;
}
/*
* Add and remove a domain
*/
-void sched_add_domain(struct domain *d)
+void sched_add_domain(struct exec_domain *ed)
{
- struct exec_domain *ed;
+ struct domain *d = ed->domain;
- for_each_exec_domain(d, ed) {
- /* Must be unpaused by control software to start execution. */
- set_bit(EDF_CTRLPAUSE, &ed->ed_flags);
- }
+ /* Must be unpaused by control software to start execution. */
+ set_bit(EDF_CTRLPAUSE, &ed->ed_flags);
- if ( d->id != IDLE_DOMAIN_ID )
- {
- /* Initialise the per-domain timer. */
- init_ac_timer(&d->timer);
- d->timer.cpu = d->exec_domain[0]->processor;
- d->timer.data = (unsigned long)d;
- d->timer.function = &dom_timer_fn;
- }
- else
+ if (ed->eid == 0)
{
- schedule_data[d->exec_domain[0]->processor].idle = d->exec_domain[0];
+ if ( d->id != IDLE_DOMAIN_ID )
+ {
+ /* Initialise the per-domain timer. */
+ init_ac_timer(&d->timer);
+ d->timer.cpu = ed->processor;
+ d->timer.data = (unsigned long)d;
+ d->timer.function = &dom_timer_fn;
+ }
+ else
+ {
+ schedule_data[ed->processor].idle = ed;
+ }
}
- SCHED_OP(add_task, d->exec_domain[0]);
+ SCHED_OP(add_task, ed);
- TRACE_2D(TRC_SCHED_DOM_ADD, d->id, d);
+ TRACE_2D(TRC_SCHED_DOM_ADD, d->id, ed);
}
void sched_rem_domain(struct domain *d)
#define GET_GDT_ENTRIES(_p) (((*(u16 *)((_p)->mm.gdt + 0))+1)>>3)
#define GET_GDT_ADDRESS(_p) (*(unsigned long *)((_p)->mm.gdt + 2))
-void destroy_gdt(struct domain *d);
+void destroy_gdt(struct exec_domain *d);
long set_gdt(struct exec_domain *d,
unsigned long *frames,
unsigned int entries);
#define __HYPERVISOR_grant_table_op 20
#define __HYPERVISOR_vm_assist 21
#define __HYPERVISOR_update_va_mapping_otherdomain 22
+#define __HYPERVISOR_boot_vcpu 23
/*
* MULTICALLS
u8 evtchn_upcall_pending;
u8 evtchn_upcall_mask;
u8 pad0, pad1;
+ u32 evtchn_pending_sel; /* 132 */
} PACKED vcpu_info_t;
/*
* word in the PENDING bitfield array.
*/
u32 evtchn_pending[32]; /* 4 */
- u32 evtchn_pending_sel; /* 132 */
u32 evtchn_mask[32]; /* 136 */
/*
/* These three operations must happen in strict order. */
if ( !test_and_set_bit(port, &s->evtchn_pending[0]) &&
!test_bit (port, &s->evtchn_mask[0]) &&
- !test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
+ !test_and_set_bit(port>>5, &ed->vcpu_info->evtchn_pending_sel) )
{
/* The VCPU pending flag must be set /after/ update to evtchn-pend. */
set_bit(0, &ed->vcpu_info->evtchn_upcall_pending);
#define IDLE_DOMAIN_ID (0x7FFFU)
#define is_idle_task(_p) (test_bit(DF_IDLETASK, &(_p)->d_flags))
+struct exec_domain *alloc_exec_domain_struct(struct domain *d,
+ unsigned long vcpu);
+
void free_domain_struct(struct domain *d);
struct domain *alloc_domain_struct();
#define set_current_state(_s) do { current->state = (_s); } while (0)
void scheduler_init(void);
void schedulers_start(void);
-void sched_add_domain(struct domain *d);
+void sched_add_domain(struct exec_domain *d);
void sched_rem_domain(struct domain *d);
long sched_ctl(struct sched_ctl_cmd *);
long sched_adjdom(struct sched_adjdom_cmd *);